source('../settings/settings.R')
source('commonFunctions.R')
persons <- SELECTED_SUBJECTS
drive <- 3
inputFile <- str_interp('../data/processed/distancewise/TT1_Drive_${drive}_${distPrev}m_${distNext}m.csv', list(drive=drive, distPrev=DISTANCE_PREV, distNext=DISTANCE_NEXT))
outputFile <- str_interp("../data/processed/analysis/TT1_Drive_${drive}_PP_${distPrev}m_${distNext}m.csv", list(drive=drive, distPrev=DISTANCE_PREV, distNext=DISTANCE_NEXT))

all_Drive3 <- read.csv(inputFile)
all_Drive3$Subject <- as.factor(all_Drive3$Subject)
all_Drive3$logPerspiration <- log(all_Drive3$Perspiration)

Acceleration Plot

computeThreshold <- function(x, xmin=min(x), xmax=max(x)) {
  x <- x[x >= xmin & x <= xmax]
  xArr <- matrix(x, nrow = 1, ncol = length(x))
  threshold <- otsu(xArr, range=c(xmin, xmax))
  return(threshold)
}
computeThresholdForAcceleration <- function(acc){
  return(computeThreshold(acc, xmin=0, xmax=20))
}
findModes <- function(x) {
  ux <- unique(x)
  ux[which.max(tabulate(match(x, ux)))]
}
acc_thresholds <- vector(mode="list", length=length(persons)) 
names(acc_thresholds) <- persons

acc_sd_thresholds <- vector(mode="list", length=length(persons)) 
names(acc_sd_thresholds) <- persons

speed_sd_thresholds <- vector(mode="list", length=length(persons)) 
names(speed_sd_thresholds) <- persons

for (p in persons) {
  pData <- all_Drive3[(all_Drive3$Subject==as.integer(p) | all_Drive3$Subject==p),]
  acc_th <- computeThresholdForAcceleration(pData$Acceleration)
  acc_th_sd <- computeThresholdForAcceleration(pData$Acc_std)
  speed_th_sd <- computeThresholdForAcceleration(pData$Speed_std)
  acc_thresholds[[p]] <- as.numeric(acc_th)
  acc_sd_thresholds[[p]] <- as.numeric(acc_th_sd)
  speed_sd_thresholds[[p]] <- as.numeric(speed_th_sd)
  
  acc_pl <- ggplot(pData, aes(x=Acceleration)) + geom_density() + geom_vline(aes(xintercept=acc_th), color="blue", linetype="dashed", size=1)
  acc_sd_pl <- ggplot(pData, aes(x=Acc_std)) + geom_density() + geom_vline(aes(xintercept=acc_th_sd), color="red", linetype="dashed", size=1)
  speed_sd_pl <- ggplot(pData, aes(x=Speed_std)) + geom_density() + geom_vline(aes(xintercept=speed_th_sd), color="red", linetype="dashed", size=1)
  
  plot(speed_sd_pl)
  print(paste0(p, " - Threshold (Acc) =", acc_th, " & (Acc SD) =", acc_th_sd, " & (Speed SD) =", acc_th_sd))
  
}
[1] "01 - Threshold (Acc) =8.2421875 & (Acc SD) =5.546875 & (Speed SD) =5.546875"
[1] "02 - Threshold (Acc) =9.9609375 & (Acc SD) =5.8203125 & (Speed SD) =5.8203125"
[1] "03 - Threshold (Acc) =9.1796875 & (Acc SD) =7.0703125 & (Speed SD) =7.0703125"
[1] "04 - Threshold (Acc) =10.1953125 & (Acc SD) =6.40625 & (Speed SD) =6.40625"
[1] "05 - Threshold (Acc) =9.9609375 & (Acc SD) =4.8046875 & (Speed SD) =4.8046875"
[1] "06 - Threshold (Acc) =10.234375 & (Acc SD) =6.4453125 & (Speed SD) =6.4453125"
[1] "07 - Threshold (Acc) =11.6015625 & (Acc SD) =7.3828125 & (Speed SD) =7.3828125"
[1] "09 - Threshold (Acc) =9.296875 & (Acc SD) =6.0546875 & (Speed SD) =6.0546875"
[1] "12 - Threshold (Acc) =9.7265625 & (Acc SD) =7.0703125 & (Speed SD) =7.0703125"
[1] "13 - Threshold (Acc) =10.5859375 & (Acc SD) =7.6171875 & (Speed SD) =7.6171875"
[1] "15 - Threshold (Acc) =9.1796875 & (Acc SD) =6.6796875 & (Speed SD) =6.6796875"
[1] "16 - Threshold (Acc) =9.5703125 & (Acc SD) =8.7109375 & (Speed SD) =8.7109375"
[1] "17 - Threshold (Acc) =9.1796875 & (Acc SD) =6.8359375 & (Speed SD) =6.8359375"
[1] "18 - Threshold (Acc) =10.9375 & (Acc SD) =6.5234375 & (Speed SD) =6.5234375"
[1] "22 - Threshold (Acc) =9.8046875 & (Acc SD) =5.6640625 & (Speed SD) =5.6640625"
[1] "24 - Threshold (Acc) =9.0234375 & (Acc SD) =7.2265625 & (Speed SD) =7.2265625"
[1] "29 - Threshold (Acc) =6.71875 & (Acc SD) =6.2109375 & (Speed SD) =6.2109375"
[1] "30 - Threshold (Acc) =8.90625 & (Acc SD) =6.5234375 & (Speed SD) =6.5234375"
[1] "31 - Threshold (Acc) =11.875 & (Acc SD) =6.2109375 & (Speed SD) =6.2109375"
[1] "32 - Threshold (Acc) =5.3515625 & (Acc SD) =7.1484375 & (Speed SD) =7.1484375"
[1] "41 - Threshold (Acc) =12.2265625 & (Acc SD) =5.8203125 & (Speed SD) =5.8203125"

mean_pp <- vector(mode="list", length=length(persons)) 
names(mean_pp) <- persons

std_pp <- vector(mode="list", length=length(persons)) 
names(std_pp) <- persons

# Mean (Turning)
mean_pp_seg0 <- vector(mode="list", length=length(persons)) 
names(mean_pp_seg0) <- persons
mean_pp_seg0_1 <- vector(mode="list", length=length(persons)) 
names(mean_pp_seg0_1) <- persons
mean_pp_seg0_2 <- vector(mode="list", length=length(persons)) 
names(mean_pp_seg0_2) <- persons
mean_pp_seg0_3 <- vector(mode="list", length=length(persons)) 
names(mean_pp_seg0_3) <- persons
mean_pp_seg0_4 <- vector(mode="list", length=length(persons)) 
names(mean_pp_seg0_4) <- persons

# Mean (Straight)
mean_pp_seg1 <- vector(mode="list", length=length(persons)) 
names(mean_pp_seg1) <- persons
mean_pp_seg2 <- vector(mode="list", length=length(persons)) 
names(mean_pp_seg2) <- persons
mean_pp_seg3 <- vector(mode="list", length=length(persons)) 
names(mean_pp_seg3) <- persons
mean_pp_seg4 <- vector(mode="list", length=length(persons)) 
names(mean_pp_seg4) <- persons
mean_pp_max <- vector(mode="list", length=length(persons)) 
names(mean_pp_max) <- persons

# SD (Turning)
std_pp_seg0 <- vector(mode="list", length=length(persons)) 
names(std_pp_seg0) <- persons
std_pp_seg0_1 <- vector(mode="list", length=length(persons)) 
names(std_pp_seg0_1) <- persons
std_pp_seg0_2 <- vector(mode="list", length=length(persons)) 
names(std_pp_seg0_2) <- persons
std_pp_seg0_3 <- vector(mode="list", length=length(persons)) 
names(std_pp_seg0_3) <- persons
std_pp_seg0_4 <- vector(mode="list", length=length(persons)) 
names(std_pp_seg0_4) <- persons

# SD (Straight)
std_pp_seg1 <- vector(mode="list", length=length(persons)) 
names(std_pp_seg1) <- persons
std_pp_seg2 <- vector(mode="list", length=length(persons)) 
names(std_pp_seg2) <- persons
std_pp_seg3 <- vector(mode="list", length=length(persons)) 
names(std_pp_seg3) <- persons
std_pp_seg4 <- vector(mode="list", length=length(persons)) 
names(std_pp_seg4) <- persons
std_pp_max <- vector(mode="list", length=length(persons)) 
names(std_pp_max) <- persons

# Acceleration Partitioning
mean_pp_HighAcc <- vector(mode="list", length=length(persons)) 
names(mean_pp_HighAcc) <- persons
mean_pp_LowAcc <- vector(mode="list", length=length(persons)) 
names(mean_pp_LowAcc) <- persons

std_pp_HighAcc <- vector(mode="list", length=length(persons)) 
names(std_pp_HighAcc) <- persons
std_pp_LowAcc <- vector(mode="list", length=length(persons)) 
names(std_pp_LowAcc) <- persons

# Accel + Segmentation
mean_pp_HighAcc1 <- vector(mode="list", length=length(persons)) 
names(mean_pp_HighAcc1) <- persons
mean_pp_LowAcc1 <- vector(mode="list", length=length(persons)) 
names(mean_pp_LowAcc1) <- persons
std_pp_HighAcc1 <- vector(mode="list", length=length(persons)) 
names(std_pp_HighAcc1) <- persons
std_pp_LowAcc1 <- vector(mode="list", length=length(persons)) 
names(std_pp_LowAcc1) <- persons

mean_pp_HighAcc2 <- vector(mode="list", length=length(persons)) 
names(mean_pp_HighAcc2) <- persons
mean_pp_LowAcc2 <- vector(mode="list", length=length(persons)) 
names(mean_pp_LowAcc2) <- persons
std_pp_HighAcc2 <- vector(mode="list", length=length(persons)) 
names(std_pp_HighAcc2) <- persons
std_pp_LowAcc2 <- vector(mode="list", length=length(persons)) 
names(std_pp_LowAcc2) <- persons

mean_pp_HighAcc3 <- vector(mode="list", length=length(persons)) 
names(mean_pp_HighAcc3) <- persons
mean_pp_LowAcc3 <- vector(mode="list", length=length(persons)) 
names(mean_pp_LowAcc3) <- persons
std_pp_HighAcc3 <- vector(mode="list", length=length(persons)) 
names(std_pp_HighAcc3) <- persons
std_pp_LowAcc3 <- vector(mode="list", length=length(persons)) 
names(std_pp_LowAcc3) <- persons

mean_pp_HighAcc4 <- vector(mode="list", length=length(persons)) 
names(mean_pp_HighAcc4) <- persons
mean_pp_LowAcc4 <- vector(mode="list", length=length(persons)) 
names(mean_pp_LowAcc4) <- persons
std_pp_HighAcc4 <- vector(mode="list", length=length(persons)) 
names(std_pp_HighAcc4) <- persons
std_pp_LowAcc4 <- vector(mode="list", length=length(persons)) 
names(std_pp_LowAcc4) <- persons

for(p in persons) {
  pData <- all_Drive3[(all_Drive3$Subject==as.integer(p) | all_Drive3$Subject==p),]
  pData_act3 <- pData[pData$Activity==3,]
  
  # Data Partitioning
  pData_seg0 <- pData[pData$Phase==0,]
  pData_seg0_1 <- pData[pData$Activity==0 & pData$Phase==0 & pData$Time > 100 & pData$Time < 200,]
  pData_seg0_2 <- pData[pData$Activity==0 & pData$Phase==0 & pData$Time > 200 & pData$Time < 300,]
  pData_seg0_3 <- pData[pData$Activity==0 & pData$Phase==0 & pData$Time > 300 & pData$Time < 400,]
  pData_seg0_4 <- pData[pData$Activity==0 & pData$Phase==0 & pData$Time > 400 & pData$Time < 500,]
  
  pData_seg1 <- pData[pData$Phase==1 & pData$Activity==3 & pData$Time < 110,]
  pData_seg2 <- pData[pData$Phase==2 & pData$Activity==3 & pData$Time < 250,]
  pData_seg3 <- pData[pData$Phase==3 & pData$Activity==3 & pData$Time < 350,]
  pData_seg4 <- pData[pData$Phase==4 & pData$Activity==3,]
  
  # pData_HighAcc <- pData[pData$Acceleration >= acc_thresholds[[p]],]
  # pData_LowAcc <- pData[pData$Acceleration < acc_thresholds[[p]],]
  pData_HighAcc <- pData[pData$Acc_std >= acc_sd_thresholds[[p]] | pData$Speed_std >= speed_sd_thresholds[[p]],]
  pData_LowAcc <- pData[pData$Acc_std < acc_sd_thresholds[[p]] & pData$Speed_std < speed_sd_thresholds[[p]],]
  
  pData_HighAcc1 <- pData_HighAcc[(pData_HighAcc$Distance < 350),]
  pData_LowAcc1 <- pData_LowAcc[(pData_LowAcc$Distance < 350),]
  pData_HighAcc2 <- pData_HighAcc[(pData_HighAcc$Distance >= 350 & pData_HighAcc$Distance < 700),]
  pData_LowAcc2 <- pData_LowAcc[(pData_LowAcc$Distance >= 350 & pData_LowAcc$Distance < 700),]
  pData_HighAcc3 <- pData_HighAcc[(pData_HighAcc$Distance >= 700 & pData_HighAcc$Distance < 1050),]
  pData_LowAcc3 <- pData_LowAcc[(pData_LowAcc$Distance >= 700 & pData_LowAcc$Distance < 1050),]
  pData_HighAcc4 <- pData_HighAcc[(pData_HighAcc$Distance >= 1050),]
  pData_LowAcc4 <- pData_LowAcc[(pData_LowAcc$Distance >= 1050),]
  
  mean_pp[[p]] <- mean(pData_act3$ppLogNormalized)
  std_pp[[p]] <- sd(pData$ppLogNormalized)
  
  mean_pp_seg0[[p]] <- mean(pData_seg0$ppLogNormalized)
  mean_pp_seg0_1[[p]] <- mean(pData_seg0_1$ppLogNormalized)
  mean_pp_seg0_2[[p]] <- mean(pData_seg0_2$ppLogNormalized)
  mean_pp_seg0_3[[p]] <- mean(pData_seg0_3$ppLogNormalized)
  mean_pp_seg0_4[[p]] <- mean(pData_seg0_4$ppLogNormalized)
  
  mean_pp_seg1[[p]] <- mean(pData_seg1$ppLogNormalized)
  mean_pp_seg2[[p]] <- mean(pData_seg2$ppLogNormalized)
  mean_pp_seg3[[p]] <- mean(pData_seg3$ppLogNormalized)
  mean_pp_seg4[[p]] <- mean(pData_seg4$ppLogNormalized)
  mean_pp_max[[p]] <- max(mean_pp_seg1[[p]], mean_pp_seg2[[p]], mean_pp_seg3[[p]], mean_pp_seg4[[p]])
  
  std_pp_seg0[[p]] <- sd(pData_seg0$ppLogNormalized)
  std_pp_seg0_1[[p]] <- sd(pData_seg0_1$ppLogNormalized)
  std_pp_seg0_2[[p]] <- sd(pData_seg0_2$ppLogNormalized)
  std_pp_seg0_3[[p]] <- sd(pData_seg0_3$ppLogNormalized)
  std_pp_seg0_4[[p]] <- sd(pData_seg0_4$ppLogNormalized)
  
  std_pp_seg1[[p]] <- sd(pData_seg1$ppLogNormalized)
  std_pp_seg2[[p]] <- sd(pData_seg2$ppLogNormalized)
  std_pp_seg3[[p]] <- sd(pData_seg3$ppLogNormalized)
  std_pp_seg4[[p]] <- sd(pData_seg4$ppLogNormalized)
  std_pp_max[[p]] <- max(std_pp_seg1[[p]], std_pp_seg2[[p]], std_pp_seg3[[p]], std_pp_seg4[[p]])
  
  # Acceleration
  mean_pp_HighAcc[[p]] <- mean(pData_HighAcc$ppNext)
  mean_pp_LowAcc[[p]] <- mean(pData_LowAcc$ppNext)
  std_pp_HighAcc[[p]] <- sd(pData_HighAcc$ppNext)
  std_pp_LowAcc[[p]] <- sd(pData_LowAcc$ppNext)
  
  mean_pp_HighAcc1[[p]] <- mean(pData_HighAcc1$ppNext)
  mean_pp_LowAcc1[[p]] <- mean(pData_LowAcc1$ppNext)
  std_pp_HighAcc1[[p]] <- sd(pData_HighAcc1$ppNext)
  std_pp_LowAcc1[[p]] <- sd(pData_LowAcc1$ppNext)
  
  mean_pp_HighAcc2[[p]] <- mean(pData_HighAcc2$ppNext)
  mean_pp_LowAcc2[[p]] <- mean(pData_LowAcc2$ppNext)
  std_pp_HighAcc2[[p]] <- sd(pData_HighAcc2$ppNext)
  std_pp_LowAcc2[[p]] <- sd(pData_LowAcc2$ppNext)
  
  mean_pp_HighAcc3[[p]] <- mean(pData_HighAcc3$ppNext)
  mean_pp_LowAcc3[[p]] <- mean(pData_LowAcc3$ppNext)
  std_pp_HighAcc3[[p]] <- sd(pData_HighAcc3$ppNext)
  std_pp_LowAcc3[[p]] <- sd(pData_LowAcc3$ppNext)
  
  mean_pp_HighAcc4[[p]] <- mean(pData_HighAcc4$ppNext)
  mean_pp_LowAcc4[[p]] <- mean(pData_LowAcc4$ppNext)
  std_pp_HighAcc4[[p]] <- sd(pData_HighAcc4$ppNext)
  std_pp_LowAcc4[[p]] <- sd(pData_LowAcc4$ppNext)
}
plt_AllAcc <- vector(mode="list", length=length(persons)) 
names(plt_AllAcc) <- persons

COLOR_ACC = "#02A3C8"
COLOR_PP = "#F28E8E"
COLOR_BRAKE = "#888888"

y1 <- list(
  tickfont = list(color = COLOR_ACC),
  title="Degree",
  range=c(0, max(all_Drive3$Acceleration))
)
y2 <- list(
  tickfont = list(color = COLOR_PP),
  overlaying = "y",
  side = "right",
  title = "Log Perspiration",
  showgrid = FALSE,
  range=c(-0.6, 0.9)
  # range=c(min(all_Drive3$ppLogNormalized), max(all_Drive3$ppLogNormalized))
)

for (p in persons) {
  pData <- all_Drive3[(all_Drive3$Subject==as.integer(p) | all_Drive3$Subject==p),]
  pData_seg0 <- pData[pData$Phase==0,]
  pData_seg1 <- pData[pData$Phase==1 & pData$Activity==3 & pData$Time < 110,]
  pData_seg2 <- pData[pData$Phase==2 & pData$Activity==3 & pData$Time < 250,]
  pData_seg3 <- pData[pData$Phase==3 & pData$Activity==3 & pData$Time < 350,]
  pData_seg4 <- pData[pData$Phase==4 & pData$Activity==3,]
  
  plot_Acc <- plot_ly(pData, x = ~Time, height=400, width=900) %>%
    # add_trace(name="Acceleration", y = ~Acceleration, type = 'scatter', mode = 'lines', line=list(width=1.5, color=COLOR_ACC)) %>% 
    add_trace(name="PP", y = ~ppLogNormalized, type = 'scatter', mode = 'lines', line=list(width=1.5, color=COLOR_PP), yaxis = "y2") %>%
    add_segments(x = min(pData$Time), xend = max(pData$Time), y = mean_pp[[p]], yend = mean_pp[[p]], 
                           yaxis = "y2", name="Avg. PP (straight)",
                           line=list(color="darkgray", dash = 'dot')) %>%
    add_segments(x = min(pData$Time), xend = max(pData$Time), y = mean_pp_seg0[[p]], yend = mean_pp_seg0[[p]], 
                           yaxis = "y2", name="Avg. PP (turning)",
                           line=list(color="black", dash = 'dot')) %>%
    add_segments(x = min(pData_seg1$Time), xend = max(pData_seg1$Time), y = mean_pp_seg1[[p]], yend = mean_pp_seg1[[p]], 
                           yaxis = "y2", name="Avg. PP (1st part)",
                           line=list(color="red", dash = 'dot')) %>%
    add_segments(x = min(pData_seg2$Time), xend = max(pData_seg2$Time), y = mean_pp_seg2[[p]], yend = mean_pp_seg2[[p]], 
                           yaxis = "y2", name="Avg. PP (2nd part)",
                           line=list(color="green", dash = 'dot')) %>%
    add_segments(x = min(pData_seg3$Time), xend = max(pData_seg3$Time), y = mean_pp_seg3[[p]], yend = mean_pp_seg3[[p]], 
                           yaxis = "y2", name="Avg. PP (3rd part)",
                           line=list(color="blue", dash = 'dot')) %>%
    add_segments(x = min(pData_seg4$Time), xend = max(pData_seg4$Time), y = mean_pp_seg4[[p]], yend = mean_pp_seg4[[p]], 
                           yaxis = "y2", name="Avg. PP (4th part)",
                           line=list(color="purple", dash = 'dot')) %>%
    layout(
      title=paste0("Subject #", p), 
      xaxis=list(title="Time [s]", range=c(0)), 
      yaxis=y1, 
      yaxis2=y2, 
      margin = list(l = 50, r = 50, b = 50, t = 50, pad = 4),
      legend = list(x = 0.5, xanchor = "center", y = 0.2, bgcolor = "rgba(0,0,0,0)", title="Metric", orientation = "h"),
      autosize = F
    )
  
  plt_AllAcc[[p]] <- plot_Acc
}


htmltools::tagList(plt_AllAcc)
NUMBER_OF_CLUSTERS = 3

color_darkpink = "#e75480"
CLUSTER_BRANCH_COLORS <- c("blue", "darkred", color_darkpink, "black")[1:NUMBER_OF_CLUSTERS]
CLUSTER_LABEL_COLORS <- c("blue", "darkred", color_darkpink, "black")[1:NUMBER_OF_CLUSTERS]

dfPP <- as.data.frame(cbind(
                            unlist(mean_pp), 
                            unlist(std_pp), 
                            unlist(mean_pp_seg0), 
                            unlist(mean_pp_seg1), 
                            unlist(mean_pp_seg2), 
                            unlist(mean_pp_seg3), 
                            unlist(mean_pp_seg4),
                            unlist(mean_pp_max),
                            unlist(std_pp_seg0), 
                            unlist(std_pp_seg1), 
                            unlist(std_pp_seg2), 
                            unlist(std_pp_seg3), 
                            unlist(std_pp_seg4),
                            unlist(std_pp_max),
                            unlist(mean_pp_seg0_1),
                            unlist(mean_pp_seg0_2),
                            unlist(mean_pp_seg0_3),
                            unlist(mean_pp_seg0_4),
                            unlist(std_pp_seg0_1),
                            unlist(std_pp_seg0_2),
                            unlist(std_pp_seg0_3),
                            unlist(std_pp_seg0_4),
                            unlist(mean_pp_HighAcc),
                            unlist(mean_pp_LowAcc),
                            unlist(std_pp_HighAcc),
                            unlist(std_pp_LowAcc),
                            unlist(mean_pp_HighAcc1),
                            unlist(mean_pp_LowAcc1),
                            unlist(std_pp_HighAcc1),
                            unlist(std_pp_LowAcc1),
                            unlist(mean_pp_HighAcc2),
                            unlist(mean_pp_LowAcc2),
                            unlist(std_pp_HighAcc2),
                            unlist(std_pp_LowAcc2),
                            unlist(mean_pp_HighAcc3),
                            unlist(mean_pp_LowAcc3),
                            unlist(std_pp_HighAcc3),
                            unlist(std_pp_LowAcc3),
                            unlist(mean_pp_HighAcc4),
                            unlist(mean_pp_LowAcc4),
                            unlist(std_pp_HighAcc4),
                            unlist(std_pp_LowAcc4)
                    ))

names(dfPP) <- c("MeanPP", "StdPP", 
                 "MeanPP_Seg0", "MeanPP_Seg1", "MeanPP_Seg2", "MeanPP_Seg3", "MeanPP_Seg4", "MeanPP_SegMax",
                 "StdPP_Seg0", "StdPP_Seg1", "StdPP_Seg2", "StdPP_Seg3", "StdPP_Seg4", "StdPP_SegMax",
                 "MeanPP_Seg0_1", "MeanPP_Seg0_2", "MeanPP_Seg0_3", "MeanPP_Seg0_4",
                 "StdPP_Seg0_1", "StdPP_Seg0_2", "StdPP_Seg0_3", "StdPP_Seg0_4",
                 "MeanPP_AccHigh", " MeanPP_AccLow", "StdPP_AccHigh", "StdPP_AccLow",
                 "MeanPP_AccHigh1", " MeanPP_AccLow1", "StdPP_AccHigh1", "StdPP_AccLow1",
                 "MeanPP_AccHigh2", " MeanPP_AccLow2", "StdPP_AccHigh2", "StdPP_AccLow2",
                 "MeanPP_AccHigh3", " MeanPP_AccLow3", "StdPP_AccHigh3", "StdPP_AccLow3",
                 "MeanPP_AccHigh4", " MeanPP_AccLow4", "StdPP_AccHigh4", "StdPP_AccLow4")

behavioralMatrixClustering <- as.matrix(dfPP)

distMatrix <- dist(behavioralMatrixClustering)
hresults <- distMatrix %>% hclust(method="average")

hc <- hresults %>% 
      as.dendrogram %>%
      set("nodes_cex", NUMBER_OF_CLUSTERS) %>%
      set("labels_col", value = CLUSTER_LABEL_COLORS, k=NUMBER_OF_CLUSTERS) %>%
      # set("leaves_pch", 19) %>%
      # set("leaves_col", value = c("gray"), k=NUMBER_OF_CLUSTERS) %>%    
      set("branches_k_color", value=CLUSTER_BRANCH_COLORS, k=NUMBER_OF_CLUSTERS)

plot(hc)
legend("topright", 
     title="Drive=Motoric \nHierachical Clustering",
     legend = c("Group 1", "Group 2", "Group 3"), 
     col = c("darkred", "pink" , "blue"),
     pch = c(20,20,20), bty = "n",  pt.cex = 1.5, cex = 0.8 , 
     text.col = "black", horiz = FALSE, inset = c(0.4, 0.1))

Clustering with SD of PP

library(dendextend)

clusteringDf <- dfPP %>% select(StdPP)
NUMBER_OF_CLUSTERS = 3

color_darkpink = "#e75480"
CLUSTER_BRANCH_COLORS <- c("darkred", "blue", "red")[1:NUMBER_OF_CLUSTERS]
CLUSTER_LABEL_COLORS <- c("darkred", "blue", "red")[1:NUMBER_OF_CLUSTERS]

behavioralMatrixClustering <- as.matrix(clusteringDf)
rownames(behavioralMatrixClustering) <- paste0("#", persons)
distMatrix <- dist(behavioralMatrixClustering)
hresults <- distMatrix %>% hclust

hc <- hresults %>% 
      as.dendrogram %>%
      set("nodes_cex", NUMBER_OF_CLUSTERS) %>%
      set("labels_col", value = CLUSTER_LABEL_COLORS, k=NUMBER_OF_CLUSTERS) %>%
      # set("leaves_pch", 19) %>%
      # set("leaves_col", value = c("gray"), k=NUMBER_OF_CLUSTERS) %>%    
      set("branches_k_color", value=CLUSTER_BRANCH_COLORS, k=NUMBER_OF_CLUSTERS)

plot(hc)
legend("topright", 
     title="Drive=Motoric \nChange of Arousal",
     legend = c("Exceptional SD" , "Low SD" , "High SD"), 
     col = c("darkred", "blue" , "red"),
     pch = c(20,20,20), bty = "n",  pt.cex = 1.5, cex = 0.8 , 
     text.col = "black", horiz = FALSE, inset = c(0.0, 0.1))

library(cluster)
silhouette_score <- function(k){
  km <- kmeans(clusteringDf, centers = k, nstart=25)
  ss <- silhouette(km$cluster, dist(clusteringDf))
  mean(ss[, 3])
}
k <- 2:10
avg_sil <- sapply(k, silhouette_score)
plot(k, type='b', avg_sil, xlab='Number of clusters', ylab='Average Silhouette Scores', frame=FALSE)

# Store clustering data
dfx <- dfPP
dfx <- cbind(persons, dfx)
names(dfx) <- c("Subject" , names(dfPP))
write.csv(dfx, outputFile, row.names = F)

Linear Model

sampledData <- getSampleSegmentedData(NA, all_Drive3, window=WINDOW_TIME)
linearModelOnline <- lmer(ppNext ~ 
              (1 | Subject)
              + Speed_u
              + Speed_std
              + Acc_u
              + Acc_std
              + Brake_u
              + Brake_std
              + Steering_u
              + Steering_std, 
            data=sampledData, REML = T)

# lmer(ppLogNormalized ~ (1 | Subject) + Speed_u + Speed_std + Acc_u + Acc_std + Brake_u + Brake_std + Steering_u + Steering_std + HR + BR, data = pData, REML = T)

# anova(model)
summary(linearModelOnline)
Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: ppNext ~ (1 | Subject) + Speed_u + Speed_std + Acc_u + Acc_std +      Brake_u + Brake_std + Steering_u + Steering_std
   Data: sampledData

REML criterion at convergence: -10642.5

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-5.3473 -0.5617 -0.0330  0.5213  4.2677 

Random effects:
 Groups   Name        Variance Std.Dev.
 Subject  (Intercept) 0.010554 0.10273 
 Residual             0.002355 0.04853 
Number of obs: 3393, groups:  Subject, 21

Fixed effects:
               Estimate Std. Error         df t value Pr(>|t|)    
(Intercept)   1.946e-01  2.442e-02  2.808e+01   7.970 1.09e-08 ***
Speed_u      -4.517e-03  3.090e-04  3.367e+03 -14.621  < 2e-16 ***
Speed_std    -7.270e-03  9.640e-04  3.367e+03  -7.542 5.92e-14 ***
Acc_u         1.065e-03  3.933e-04  3.372e+03   2.708   0.0068 ** 
Acc_std       2.798e-03  3.527e-04  3.365e+03   7.933 2.88e-15 ***
Brake_u       2.307e-03  5.067e-04  3.364e+03   4.552 5.49e-06 ***
Brake_std    -4.774e-04  5.281e-04  3.365e+03  -0.904   0.3661    
Steering_u   -1.108e-05  1.070e-05  3.365e+03  -1.036   0.3005    
Steering_std -3.197e-05  2.973e-05  3.365e+03  -1.076   0.2822    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Correlation of Fixed Effects:
            (Intr) Speed_ Spd_st Acc_u  Acc_st Brake_ Brk_st Sterng_
Speed_u     -0.318                                                  
Speed_std   -0.071  0.029                                           
Acc_u       -0.122 -0.281  0.027                                    
Acc_std     -0.126  0.217 -0.044 -0.042                             
Brake_u     -0.144  0.255 -0.064  0.188  0.189                      
Brake_std    0.032  0.137 -0.069 -0.255 -0.427 -0.755               
Steering_u  -0.054  0.153  0.069 -0.040  0.017  0.126 -0.025        
Steerng_std -0.266  0.527 -0.088  0.227  0.449  0.293 -0.360 -0.097 
plot(linearModelOnline)

---
title: "R Notebook"
output: html_notebook
---

```{r}
source('../settings/settings.R')
source('commonFunctions.R')
```

```{r}
persons <- SELECTED_SUBJECTS
drive <- 3
inputFile <- str_interp('../data/processed/distancewise/TT1_Drive_${drive}_${distPrev}m_${distNext}m.csv', list(drive=drive, distPrev=DISTANCE_PREV, distNext=DISTANCE_NEXT))
outputFile <- str_interp("../data/processed/analysis/TT1_Drive_${drive}_PP_${distPrev}m_${distNext}m.csv", list(drive=drive, distPrev=DISTANCE_PREV, distNext=DISTANCE_NEXT))

all_Drive3 <- read.csv(inputFile)
all_Drive3$Subject <- as.factor(all_Drive3$Subject)
all_Drive3$logPerspiration <- log(all_Drive3$Perspiration)
```

# Acceleration Plot
```{r}
computeThreshold <- function(x, xmin=min(x), xmax=max(x)) {
  x <- x[x >= xmin & x <= xmax]
  xArr <- matrix(x, nrow = 1, ncol = length(x))
  threshold <- otsu(xArr, range=c(xmin, xmax))
  return(threshold)
}
computeThresholdForAcceleration <- function(acc){
  return(computeThreshold(acc, xmin=0, xmax=20))
}
findModes <- function(x) {
  ux <- unique(x)
  ux[which.max(tabulate(match(x, ux)))]
}
```

```{r}
acc_thresholds <- vector(mode="list", length=length(persons)) 
names(acc_thresholds) <- persons

acc_sd_thresholds <- vector(mode="list", length=length(persons)) 
names(acc_sd_thresholds) <- persons

speed_sd_thresholds <- vector(mode="list", length=length(persons)) 
names(speed_sd_thresholds) <- persons

for (p in persons) {
  pData <- all_Drive3[(all_Drive3$Subject==as.integer(p) | all_Drive3$Subject==p),]
  acc_th <- computeThresholdForAcceleration(pData$Acceleration)
  acc_th_sd <- computeThresholdForAcceleration(pData$Acc_std)
  speed_th_sd <- computeThresholdForAcceleration(pData$Speed_std)
  acc_thresholds[[p]] <- as.numeric(acc_th)
  acc_sd_thresholds[[p]] <- as.numeric(acc_th_sd)
  speed_sd_thresholds[[p]] <- as.numeric(speed_th_sd)
  
  acc_pl <- ggplot(pData, aes(x=Acceleration)) + geom_density() + geom_vline(aes(xintercept=acc_th), color="blue", linetype="dashed", size=1)
  acc_sd_pl <- ggplot(pData, aes(x=Acc_std)) + geom_density() + geom_vline(aes(xintercept=acc_th_sd), color="red", linetype="dashed", size=1)
  speed_sd_pl <- ggplot(pData, aes(x=Speed_std)) + geom_density() + geom_vline(aes(xintercept=speed_th_sd), color="red", linetype="dashed", size=1)
  
  plot(speed_sd_pl)
  print(paste0(p, " - Threshold (Acc) =", acc_th, " & (Acc SD) =", acc_th_sd, " & (Speed SD) =", acc_th_sd))
  
}
```

```{r}
mean_pp <- vector(mode="list", length=length(persons)) 
names(mean_pp) <- persons

std_pp <- vector(mode="list", length=length(persons)) 
names(std_pp) <- persons

# Mean (Turning)
mean_pp_seg0 <- vector(mode="list", length=length(persons)) 
names(mean_pp_seg0) <- persons
mean_pp_seg0_1 <- vector(mode="list", length=length(persons)) 
names(mean_pp_seg0_1) <- persons
mean_pp_seg0_2 <- vector(mode="list", length=length(persons)) 
names(mean_pp_seg0_2) <- persons
mean_pp_seg0_3 <- vector(mode="list", length=length(persons)) 
names(mean_pp_seg0_3) <- persons
mean_pp_seg0_4 <- vector(mode="list", length=length(persons)) 
names(mean_pp_seg0_4) <- persons

# Mean (Straight)
mean_pp_seg1 <- vector(mode="list", length=length(persons)) 
names(mean_pp_seg1) <- persons
mean_pp_seg2 <- vector(mode="list", length=length(persons)) 
names(mean_pp_seg2) <- persons
mean_pp_seg3 <- vector(mode="list", length=length(persons)) 
names(mean_pp_seg3) <- persons
mean_pp_seg4 <- vector(mode="list", length=length(persons)) 
names(mean_pp_seg4) <- persons
mean_pp_max <- vector(mode="list", length=length(persons)) 
names(mean_pp_max) <- persons

# SD (Turning)
std_pp_seg0 <- vector(mode="list", length=length(persons)) 
names(std_pp_seg0) <- persons
std_pp_seg0_1 <- vector(mode="list", length=length(persons)) 
names(std_pp_seg0_1) <- persons
std_pp_seg0_2 <- vector(mode="list", length=length(persons)) 
names(std_pp_seg0_2) <- persons
std_pp_seg0_3 <- vector(mode="list", length=length(persons)) 
names(std_pp_seg0_3) <- persons
std_pp_seg0_4 <- vector(mode="list", length=length(persons)) 
names(std_pp_seg0_4) <- persons

# SD (Straight)
std_pp_seg1 <- vector(mode="list", length=length(persons)) 
names(std_pp_seg1) <- persons
std_pp_seg2 <- vector(mode="list", length=length(persons)) 
names(std_pp_seg2) <- persons
std_pp_seg3 <- vector(mode="list", length=length(persons)) 
names(std_pp_seg3) <- persons
std_pp_seg4 <- vector(mode="list", length=length(persons)) 
names(std_pp_seg4) <- persons
std_pp_max <- vector(mode="list", length=length(persons)) 
names(std_pp_max) <- persons

# Acceleration Partitioning
mean_pp_HighAcc <- vector(mode="list", length=length(persons)) 
names(mean_pp_HighAcc) <- persons
mean_pp_LowAcc <- vector(mode="list", length=length(persons)) 
names(mean_pp_LowAcc) <- persons

std_pp_HighAcc <- vector(mode="list", length=length(persons)) 
names(std_pp_HighAcc) <- persons
std_pp_LowAcc <- vector(mode="list", length=length(persons)) 
names(std_pp_LowAcc) <- persons

# Accel + Segmentation
mean_pp_HighAcc1 <- vector(mode="list", length=length(persons)) 
names(mean_pp_HighAcc1) <- persons
mean_pp_LowAcc1 <- vector(mode="list", length=length(persons)) 
names(mean_pp_LowAcc1) <- persons
std_pp_HighAcc1 <- vector(mode="list", length=length(persons)) 
names(std_pp_HighAcc1) <- persons
std_pp_LowAcc1 <- vector(mode="list", length=length(persons)) 
names(std_pp_LowAcc1) <- persons

mean_pp_HighAcc2 <- vector(mode="list", length=length(persons)) 
names(mean_pp_HighAcc2) <- persons
mean_pp_LowAcc2 <- vector(mode="list", length=length(persons)) 
names(mean_pp_LowAcc2) <- persons
std_pp_HighAcc2 <- vector(mode="list", length=length(persons)) 
names(std_pp_HighAcc2) <- persons
std_pp_LowAcc2 <- vector(mode="list", length=length(persons)) 
names(std_pp_LowAcc2) <- persons

mean_pp_HighAcc3 <- vector(mode="list", length=length(persons)) 
names(mean_pp_HighAcc3) <- persons
mean_pp_LowAcc3 <- vector(mode="list", length=length(persons)) 
names(mean_pp_LowAcc3) <- persons
std_pp_HighAcc3 <- vector(mode="list", length=length(persons)) 
names(std_pp_HighAcc3) <- persons
std_pp_LowAcc3 <- vector(mode="list", length=length(persons)) 
names(std_pp_LowAcc3) <- persons

mean_pp_HighAcc4 <- vector(mode="list", length=length(persons)) 
names(mean_pp_HighAcc4) <- persons
mean_pp_LowAcc4 <- vector(mode="list", length=length(persons)) 
names(mean_pp_LowAcc4) <- persons
std_pp_HighAcc4 <- vector(mode="list", length=length(persons)) 
names(std_pp_HighAcc4) <- persons
std_pp_LowAcc4 <- vector(mode="list", length=length(persons)) 
names(std_pp_LowAcc4) <- persons

for(p in persons) {
  pData <- all_Drive3[(all_Drive3$Subject==as.integer(p) | all_Drive3$Subject==p),]
  pData_act3 <- pData[pData$Activity==3,]
  
  # Data Partitioning
  pData_seg0 <- pData[pData$Phase==0,]
  pData_seg0_1 <- pData[pData$Activity==0 & pData$Phase==0 & pData$Time > 100 & pData$Time < 200,]
  pData_seg0_2 <- pData[pData$Activity==0 & pData$Phase==0 & pData$Time > 200 & pData$Time < 300,]
  pData_seg0_3 <- pData[pData$Activity==0 & pData$Phase==0 & pData$Time > 300 & pData$Time < 400,]
  pData_seg0_4 <- pData[pData$Activity==0 & pData$Phase==0 & pData$Time > 400 & pData$Time < 500,]
  
  pData_seg1 <- pData[pData$Phase==1 & pData$Activity==3 & pData$Time < 110,]
  pData_seg2 <- pData[pData$Phase==2 & pData$Activity==3 & pData$Time < 250,]
  pData_seg3 <- pData[pData$Phase==3 & pData$Activity==3 & pData$Time < 350,]
  pData_seg4 <- pData[pData$Phase==4 & pData$Activity==3,]
  
  # pData_HighAcc <- pData[pData$Acceleration >= acc_thresholds[[p]],]
  # pData_LowAcc <- pData[pData$Acceleration < acc_thresholds[[p]],]
  pData_HighAcc <- pData[pData$Acc_std >= acc_sd_thresholds[[p]] | pData$Speed_std >= speed_sd_thresholds[[p]],]
  pData_LowAcc <- pData[pData$Acc_std < acc_sd_thresholds[[p]] & pData$Speed_std < speed_sd_thresholds[[p]],]
  
  pData_HighAcc1 <- pData_HighAcc[(pData_HighAcc$Distance < 350),]
  pData_LowAcc1 <- pData_LowAcc[(pData_LowAcc$Distance < 350),]
  pData_HighAcc2 <- pData_HighAcc[(pData_HighAcc$Distance >= 350 & pData_HighAcc$Distance < 700),]
  pData_LowAcc2 <- pData_LowAcc[(pData_LowAcc$Distance >= 350 & pData_LowAcc$Distance < 700),]
  pData_HighAcc3 <- pData_HighAcc[(pData_HighAcc$Distance >= 700 & pData_HighAcc$Distance < 1050),]
  pData_LowAcc3 <- pData_LowAcc[(pData_LowAcc$Distance >= 700 & pData_LowAcc$Distance < 1050),]
  pData_HighAcc4 <- pData_HighAcc[(pData_HighAcc$Distance >= 1050),]
  pData_LowAcc4 <- pData_LowAcc[(pData_LowAcc$Distance >= 1050),]
  
  mean_pp[[p]] <- mean(pData_act3$ppLogNormalized)
  std_pp[[p]] <- sd(pData$ppLogNormalized)
  
  mean_pp_seg0[[p]] <- mean(pData_seg0$ppLogNormalized)
  mean_pp_seg0_1[[p]] <- mean(pData_seg0_1$ppLogNormalized)
  mean_pp_seg0_2[[p]] <- mean(pData_seg0_2$ppLogNormalized)
  mean_pp_seg0_3[[p]] <- mean(pData_seg0_3$ppLogNormalized)
  mean_pp_seg0_4[[p]] <- mean(pData_seg0_4$ppLogNormalized)
  
  mean_pp_seg1[[p]] <- mean(pData_seg1$ppLogNormalized)
  mean_pp_seg2[[p]] <- mean(pData_seg2$ppLogNormalized)
  mean_pp_seg3[[p]] <- mean(pData_seg3$ppLogNormalized)
  mean_pp_seg4[[p]] <- mean(pData_seg4$ppLogNormalized)
  mean_pp_max[[p]] <- max(mean_pp_seg1[[p]], mean_pp_seg2[[p]], mean_pp_seg3[[p]], mean_pp_seg4[[p]])
  
  std_pp_seg0[[p]] <- sd(pData_seg0$ppLogNormalized)
  std_pp_seg0_1[[p]] <- sd(pData_seg0_1$ppLogNormalized)
  std_pp_seg0_2[[p]] <- sd(pData_seg0_2$ppLogNormalized)
  std_pp_seg0_3[[p]] <- sd(pData_seg0_3$ppLogNormalized)
  std_pp_seg0_4[[p]] <- sd(pData_seg0_4$ppLogNormalized)
  
  std_pp_seg1[[p]] <- sd(pData_seg1$ppLogNormalized)
  std_pp_seg2[[p]] <- sd(pData_seg2$ppLogNormalized)
  std_pp_seg3[[p]] <- sd(pData_seg3$ppLogNormalized)
  std_pp_seg4[[p]] <- sd(pData_seg4$ppLogNormalized)
  std_pp_max[[p]] <- max(std_pp_seg1[[p]], std_pp_seg2[[p]], std_pp_seg3[[p]], std_pp_seg4[[p]])
  
  # Acceleration
  mean_pp_HighAcc[[p]] <- mean(pData_HighAcc$ppNext)
  mean_pp_LowAcc[[p]] <- mean(pData_LowAcc$ppNext)
  std_pp_HighAcc[[p]] <- sd(pData_HighAcc$ppNext)
  std_pp_LowAcc[[p]] <- sd(pData_LowAcc$ppNext)
  
  mean_pp_HighAcc1[[p]] <- mean(pData_HighAcc1$ppNext)
  mean_pp_LowAcc1[[p]] <- mean(pData_LowAcc1$ppNext)
  std_pp_HighAcc1[[p]] <- sd(pData_HighAcc1$ppNext)
  std_pp_LowAcc1[[p]] <- sd(pData_LowAcc1$ppNext)
  
  mean_pp_HighAcc2[[p]] <- mean(pData_HighAcc2$ppNext)
  mean_pp_LowAcc2[[p]] <- mean(pData_LowAcc2$ppNext)
  std_pp_HighAcc2[[p]] <- sd(pData_HighAcc2$ppNext)
  std_pp_LowAcc2[[p]] <- sd(pData_LowAcc2$ppNext)
  
  mean_pp_HighAcc3[[p]] <- mean(pData_HighAcc3$ppNext)
  mean_pp_LowAcc3[[p]] <- mean(pData_LowAcc3$ppNext)
  std_pp_HighAcc3[[p]] <- sd(pData_HighAcc3$ppNext)
  std_pp_LowAcc3[[p]] <- sd(pData_LowAcc3$ppNext)
  
  mean_pp_HighAcc4[[p]] <- mean(pData_HighAcc4$ppNext)
  mean_pp_LowAcc4[[p]] <- mean(pData_LowAcc4$ppNext)
  std_pp_HighAcc4[[p]] <- sd(pData_HighAcc4$ppNext)
  std_pp_LowAcc4[[p]] <- sd(pData_LowAcc4$ppNext)
}

```

```{r}
plt_AllAcc <- vector(mode="list", length=length(persons)) 
names(plt_AllAcc) <- persons

COLOR_ACC = "#02A3C8"
COLOR_PP = "#F28E8E"
COLOR_BRAKE = "#888888"

y1 <- list(
  tickfont = list(color = COLOR_ACC),
  title="Degree",
  range=c(0, max(all_Drive3$Acceleration))
)
y2 <- list(
  tickfont = list(color = COLOR_PP),
  overlaying = "y",
  side = "right",
  title = "Log Perspiration",
  showgrid = FALSE,
  range=c(-0.6, 0.9)
  # range=c(min(all_Drive3$ppLogNormalized), max(all_Drive3$ppLogNormalized))
)

for (p in persons) {
  pData <- all_Drive3[(all_Drive3$Subject==as.integer(p) | all_Drive3$Subject==p),]
  pData_seg0 <- pData[pData$Phase==0,]
  pData_seg1 <- pData[pData$Phase==1 & pData$Activity==3 & pData$Time < 110,]
  pData_seg2 <- pData[pData$Phase==2 & pData$Activity==3 & pData$Time < 250,]
  pData_seg3 <- pData[pData$Phase==3 & pData$Activity==3 & pData$Time < 350,]
  pData_seg4 <- pData[pData$Phase==4 & pData$Activity==3,]
  
  plot_Acc <- plot_ly(pData, x = ~Time, height=400, width=900) %>%
    # add_trace(name="Acceleration", y = ~Acceleration, type = 'scatter', mode = 'lines', line=list(width=1.5, color=COLOR_ACC)) %>% 
    add_trace(name="PP", y = ~ppLogNormalized, type = 'scatter', mode = 'lines', line=list(width=1.5, color=COLOR_PP), yaxis = "y2") %>%
    add_segments(x = min(pData$Time), xend = max(pData$Time), y = mean_pp[[p]], yend = mean_pp[[p]], 
                           yaxis = "y2", name="Avg. PP (straight)",
                           line=list(color="darkgray", dash = 'dot')) %>%
    add_segments(x = min(pData$Time), xend = max(pData$Time), y = mean_pp_seg0[[p]], yend = mean_pp_seg0[[p]], 
                           yaxis = "y2", name="Avg. PP (turning)",
                           line=list(color="black", dash = 'dot')) %>%
    add_segments(x = min(pData_seg1$Time), xend = max(pData_seg1$Time), y = mean_pp_seg1[[p]], yend = mean_pp_seg1[[p]], 
                           yaxis = "y2", name="Avg. PP (1st part)",
                           line=list(color="red", dash = 'dot')) %>%
    add_segments(x = min(pData_seg2$Time), xend = max(pData_seg2$Time), y = mean_pp_seg2[[p]], yend = mean_pp_seg2[[p]], 
                           yaxis = "y2", name="Avg. PP (2nd part)",
                           line=list(color="green", dash = 'dot')) %>%
    add_segments(x = min(pData_seg3$Time), xend = max(pData_seg3$Time), y = mean_pp_seg3[[p]], yend = mean_pp_seg3[[p]], 
                           yaxis = "y2", name="Avg. PP (3rd part)",
                           line=list(color="blue", dash = 'dot')) %>%
    add_segments(x = min(pData_seg4$Time), xend = max(pData_seg4$Time), y = mean_pp_seg4[[p]], yend = mean_pp_seg4[[p]], 
                           yaxis = "y2", name="Avg. PP (4th part)",
                           line=list(color="purple", dash = 'dot')) %>%
    layout(
      title=paste0("Subject #", p), 
      xaxis=list(title="Time [s]", range=c(0)), 
      yaxis=y1, 
      yaxis2=y2, 
      margin = list(l = 50, r = 50, b = 50, t = 50, pad = 4),
      legend = list(x = 0.5, xanchor = "center", y = 0.2, bgcolor = "rgba(0,0,0,0)", title="Metric", orientation = "h"),
      autosize = F
    )
  
  plt_AllAcc[[p]] <- plot_Acc
}


htmltools::tagList(plt_AllAcc)
```


```{r}
NUMBER_OF_CLUSTERS = 3

color_darkpink = "#e75480"
CLUSTER_BRANCH_COLORS <- c("blue", "darkred", color_darkpink, "black")[1:NUMBER_OF_CLUSTERS]
CLUSTER_LABEL_COLORS <- c("blue", "darkred", color_darkpink, "black")[1:NUMBER_OF_CLUSTERS]

dfPP <- as.data.frame(cbind(
                            unlist(mean_pp), 
                            unlist(std_pp), 
                            unlist(mean_pp_seg0), 
                            unlist(mean_pp_seg1), 
                            unlist(mean_pp_seg2), 
                            unlist(mean_pp_seg3), 
                            unlist(mean_pp_seg4),
                            unlist(mean_pp_max),
                            unlist(std_pp_seg0), 
                            unlist(std_pp_seg1), 
                            unlist(std_pp_seg2), 
                            unlist(std_pp_seg3), 
                            unlist(std_pp_seg4),
                            unlist(std_pp_max),
                            unlist(mean_pp_seg0_1),
                            unlist(mean_pp_seg0_2),
                            unlist(mean_pp_seg0_3),
                            unlist(mean_pp_seg0_4),
                            unlist(std_pp_seg0_1),
                            unlist(std_pp_seg0_2),
                            unlist(std_pp_seg0_3),
                            unlist(std_pp_seg0_4),
                            unlist(mean_pp_HighAcc),
                            unlist(mean_pp_LowAcc),
                            unlist(std_pp_HighAcc),
                            unlist(std_pp_LowAcc),
                            unlist(mean_pp_HighAcc1),
                            unlist(mean_pp_LowAcc1),
                            unlist(std_pp_HighAcc1),
                            unlist(std_pp_LowAcc1),
                            unlist(mean_pp_HighAcc2),
                            unlist(mean_pp_LowAcc2),
                            unlist(std_pp_HighAcc2),
                            unlist(std_pp_LowAcc2),
                            unlist(mean_pp_HighAcc3),
                            unlist(mean_pp_LowAcc3),
                            unlist(std_pp_HighAcc3),
                            unlist(std_pp_LowAcc3),
                            unlist(mean_pp_HighAcc4),
                            unlist(mean_pp_LowAcc4),
                            unlist(std_pp_HighAcc4),
                            unlist(std_pp_LowAcc4)
                    ))

names(dfPP) <- c("MeanPP", "StdPP", 
                 "MeanPP_Seg0", "MeanPP_Seg1", "MeanPP_Seg2", "MeanPP_Seg3", "MeanPP_Seg4", "MeanPP_SegMax",
                 "StdPP_Seg0", "StdPP_Seg1", "StdPP_Seg2", "StdPP_Seg3", "StdPP_Seg4", "StdPP_SegMax",
                 "MeanPP_Seg0_1", "MeanPP_Seg0_2", "MeanPP_Seg0_3", "MeanPP_Seg0_4",
                 "StdPP_Seg0_1", "StdPP_Seg0_2", "StdPP_Seg0_3", "StdPP_Seg0_4",
                 "MeanPP_AccHigh", " MeanPP_AccLow", "StdPP_AccHigh", "StdPP_AccLow",
                 "MeanPP_AccHigh1", " MeanPP_AccLow1", "StdPP_AccHigh1", "StdPP_AccLow1",
                 "MeanPP_AccHigh2", " MeanPP_AccLow2", "StdPP_AccHigh2", "StdPP_AccLow2",
                 "MeanPP_AccHigh3", " MeanPP_AccLow3", "StdPP_AccHigh3", "StdPP_AccLow3",
                 "MeanPP_AccHigh4", " MeanPP_AccLow4", "StdPP_AccHigh4", "StdPP_AccLow4")

behavioralMatrixClustering <- as.matrix(dfPP)

distMatrix <- dist(behavioralMatrixClustering)
hresults <- distMatrix %>% hclust(method="average")

hc <- hresults %>% 
      as.dendrogram %>%
      set("nodes_cex", NUMBER_OF_CLUSTERS) %>%
      set("labels_col", value = CLUSTER_LABEL_COLORS, k=NUMBER_OF_CLUSTERS) %>%
      # set("leaves_pch", 19) %>%
      # set("leaves_col", value = c("gray"), k=NUMBER_OF_CLUSTERS) %>%    
      set("branches_k_color", value=CLUSTER_BRANCH_COLORS, k=NUMBER_OF_CLUSTERS)

plot(hc)
legend("topright", 
     title="Drive=Motoric \nHierachical Clustering",
     legend = c("Group 1", "Group 2", "Group 3"), 
     col = c("darkred", "pink" , "blue"),
     pch = c(20,20,20), bty = "n",  pt.cex = 1.5, cex = 0.8 , 
     text.col = "black", horiz = FALSE, inset = c(0.4, 0.1))
```

# Clustering with SD of PP
```{r}
library(dendextend)

clusteringDf <- dfPP %>% select(StdPP)
NUMBER_OF_CLUSTERS = 3

color_darkpink = "#e75480"
CLUSTER_BRANCH_COLORS <- c("darkred", "blue", "red")[1:NUMBER_OF_CLUSTERS]
CLUSTER_LABEL_COLORS <- c("darkred", "blue", "red")[1:NUMBER_OF_CLUSTERS]

behavioralMatrixClustering <- as.matrix(clusteringDf)
rownames(behavioralMatrixClustering) <- paste0("#", persons)
distMatrix <- dist(behavioralMatrixClustering)
hresults <- distMatrix %>% hclust

hc <- hresults %>% 
      as.dendrogram %>%
      set("nodes_cex", NUMBER_OF_CLUSTERS) %>%
      set("labels_col", value = CLUSTER_LABEL_COLORS, k=NUMBER_OF_CLUSTERS) %>%
      # set("leaves_pch", 19) %>%
      # set("leaves_col", value = c("gray"), k=NUMBER_OF_CLUSTERS) %>%    
      set("branches_k_color", value=CLUSTER_BRANCH_COLORS, k=NUMBER_OF_CLUSTERS)

plot(hc)
legend("topright", 
     title="Drive=Motoric \nChange of Arousal",
     legend = c("Exceptional SD" , "Low SD" , "High SD"), 
     col = c("darkred", "blue" , "red"),
     pch = c(20,20,20), bty = "n",  pt.cex = 1.5, cex = 0.8 , 
     text.col = "black", horiz = FALSE, inset = c(0.0, 0.1))
```

```{r}
library(cluster)
silhouette_score <- function(k){
  km <- kmeans(clusteringDf, centers = k, nstart=25)
  ss <- silhouette(km$cluster, dist(clusteringDf))
  mean(ss[, 3])
}
k <- 2:10
avg_sil <- sapply(k, silhouette_score)
plot(k, type='b', avg_sil, xlab='Number of clusters', ylab='Average Silhouette Scores', frame=FALSE)
```

```{r}
# Store clustering data
dfx <- dfPP
dfx <- cbind(persons, dfx)
names(dfx) <- c("Subject" , names(dfPP))
write.csv(dfx, outputFile, row.names = F)
```

## Linear Model
```{r}
sampledData <- getSampleSegmentedData(NA, all_Drive3, window=WINDOW_TIME)
linearModelOnline <- lmer(ppNext ~ 
              (1 | Subject)
              + Speed_u
              + Speed_std
              + Acc_u
              + Acc_std
              + Brake_u
              + Brake_std
              + Steering_u
              + Steering_std, 
            data=sampledData, REML = T)

# lmer(ppLogNormalized ~ (1 | Subject) + Speed_u + Speed_std + Acc_u + Acc_std + Brake_u + Brake_std + Steering_u + Steering_std + HR + BR, data = pData, REML = T)

# anova(model)
summary(linearModelOnline)
plot(linearModelOnline)
```